#!/usr/bin/python
from hashdb_output import log, VERBOSE, QUIET, DEBUG, DEFAULT
from hashdb_config_base import CombineDB
from hashdb_walk import Walker, PREFIX_SKIP
from hashdb_db import HashDatabase, HashDatabase2
from hashdb_hash import build_hash
from hashdb_config import parse_config
from hashdb_config_base import ConfigSettings
from hashdb_mntent_wrapper import MountEntries, MountEntry
import stat
import time
import locale
import os


THRESHHOLD_TIMEDELTA   =  5.0 # Measured in seconds
THRESHHOLD_CHANGEDELTA = 1000

class AppHashDB(object):
    _defaults = {}

    def __init__(self, settings=None, cmdline=False):
        object.__init__(self)

        self._settings = ConfigSettings()
        if settings:
            self._settings.update(settings)
        self._settings = parse_config(self._settings, cmdline)

    @property
    def settings(self):
        return self._settings

    def run(self):
        if self.settings.cmd == 'hash':
            return self.run_hash()
        elif self.settings.cmd == 'match':
            return self.run_match()
        elif self.settings.cmd == 'view':
            return self.run_view()
        elif self.settings.cmd == 'query':
            return self.run_query()
        elif self.settings.cmd == 'schema':
            return self.run_schema()

    def run_schema(self):
        for x in self.schema():
            log.default(x)

    def schema(self):
        # Setup logging
        log.setLevel(self.settings.verbosity)

        # Display configuration
        log.debug('%s' % self.settings)

        # Setup database
        log.debug('* setup database\'s...')
        db = HashDatabase2(self.settings.database)
        if not db.open():
            return

        yield '-- [Tables]'
        yield '-- ' + ', '.join(
            row['name']
            for row in db.connection.execute("SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name")
        )
        for row in db.connection.execute("SELECT name,sql FROM sqlite_master WHERE (type='table') AND (substr(name,1,7) <> 'sqlite_') ORDER BY name"):
            yield row['sql']
        yield '-- [Databases]'
        for row in db.connection.execute("PRAGMA database_list"):
            yield '-- %s: %s' % (row[1], row[2])


    def run_match(self):
        for target, target_data, matches, db in self.match():
            log.default('* %s' % target.user)
            log.default('  %s' % target_data.path)
            matches.sort()
            for match in matches:
                if not match.is_remote:
                    log.default('  %s' % match.path)
                else:
                    log.default('  %s:%s' % (match.source, match.path))

    def run_view(self):
        try:
            timeformat = locale.nl_langinfo(locale.D_T_FMT)
            # '%a %d %b %Y %T %Z'
        except:
            timeformat = '%Y-%m-%d %H:%M:%S'
        numpadd    = len(locale.format('%d', 2**31, True))
        for row, db in self.view():
            m = '*' if row.mark == 0 else ' '
            h = row.hash
            s = locale.format('%d', row.size, True)
            t = time.strftime(timeformat, time.localtime(row.time))
            p = row.path
            log.default(
                #hash:mark  time  size  path
                '%s%s %s  %*s  %s' % (
                h,
                m,
                t,
                numpadd,
                s,
                p
            ))

    def view(self):
        # Setup logging
        log.setLevel(self.setting_verbosity)

        # Display configuration
        display_settings(self.settings, log.debug)

        # Setup database
        log.debug('* setup database\'s...')
        db = HashDatabase(self.setting_database)
        db.add_combines(self.setting_combine)
        if not db.open():
            return

        # Read mounts (for truepath)
        mounts = MountEntries()

        # Build a query string, filtering on targets
        targets = [mounts.truepath(t) for t in self.setting_targets]
        qfilters = []
        qargmap  = {}
        if  ('/'    not in targets)\
        and ('\\'   not in targets)\
        and ('//'   not in targets)\
        and ('\\\\' not in targets):
            for i, target in enumerate(targets):
                target = mounts.truepath(target)
                qfilters.append(
                    r"""(path = :%(name)s) OR (substr(path, 1, :%(name)s_len + 1) = :%(name)s || '/')""" % { 'name': 't%02d' % i}
                )
                qargmap.update({
                    't%02d' % i: target,
                    't%02d_len' % i: len(target)
                })
        qfilter = (r"""WHERE """ + r""" OR """.join(qfilters)) if len(qfilters) != 0 else r""""""
        qorder = r"""
            ORDER BY
                path,
                mark DESC
        """ if self.setting_walk_depth else r"""
            ORDER BY
                count_components(path),
                path,
                mark DESC
        """

        query = r"""
            SELECT
                *
            FROM
                combinedtab
        """ + qfilter + qorder

        # yield all results as a HashRowData blob (don't expose the underlying row)
        for row in db.connection.execute(query, qargmap):
            yield (HashRowData(path=row['path'], hash=row['hash'], mark=row['mark'], time=row['time'], size=row['size']), db)

    def run_hash(self):
        for target, hash, db in self.hash():
            log.default('%s  %s' % (hash, target.user))

    def hash(self, targets=None):
        # Setup logging
        log.setLevel(self.settings.verbosity)

        # Display configuration
        log.debug('%s' % self.settings)

        # Setup database
        log.debug('* setup database\'s...')
        db = HashDatabase2(self.settings.database)
        db.extend_locals(self.settings.databases_locals)
        if not db.open():
            return

        # Setup the walker
        log.debug('* setup walker...')
        walker = Walker()
        walker.walk_depth = self.settings.walk_depth
        walker.extend_targets(targets or self.settings.hash_targets)
        walker.extend_skip_fstypes(self.settings.skip_fstypes)
        walker.extend_skip_paths(self.settings.skip_paths)
        walker.extend_skip_names(self.settings.skip_names)
        walker.extend_skip_dirnames(self.settings.skip_dirnames)
        walker.extend_skip_filenames(self.settings.skip_filenames)
        walker.skip_mounts = self.settings.skip_mounts
        walker.skip_binds = self.settings.skip_binds
        walker.skip_symlinks = self.settings.skip_symlinks

        log.debug('* walk...')

        try:
            start_time    = time.time()
            start_changes = db.connection.total_changes
            for target in walker.walk():
                target_hash = db.path_hash(target.true, target.stat)
                if target_hash == None:
                    target_hash = build_hash(target)
                    if target_hash != None:
                        db.path_setstat(target.true, target.stat, target_hash)
                if target_hash != None:
                    yield (target, target_hash, db)

                # Only commit every so often since we are limited by disk speed
                now = time.time()
                if ((now - start_time) >= THRESHHOLD_TIMEDELTA)\
                or ((db.connection.total_changes - start_changes) > THRESHHOLD_CHANGEDELTA):
                    log.debug('* committing changes...')
                    db.connection.commit()
                start_time    = time.time()
                start_changes = db.connection.total_changes
        finally:
            log.debug('* committing changes...')
            db.connection.commit()

    def match(self):
        # Record matches (hash,size) pairs
        matches_done = set()

        # Iterate over targets
        try:
            for target, hash, db in self.hash(targets=self.settings.match_targets):
                matches = []

                # Find row data
                target_data = db.path_get_prime(target.true)
                if not target_data:
                    log.debug(PREFIX_SKIP + '%r (unable to find data)' % target.user)
                    continue

                # Already reported?
                if (target_data.hash, target_data.size) in matches_done:
                    log.debug(PREFIX_SKIP + '%r (already reported match)' % target.user)
                    continue
                matches_done.add((target_data.hash, target_data.size))

                # Search for duplicates
                for match_data in db.path_select_duplicates(path=target_data.path, hash=target_data.hash, size=target_data.size):
                    if not match_data.is_remote:
                        # local
                        if self.settings.match_verify:
                            try:
                                match_stat = os.lstat(match_data.path)
                            except OSError, ex:
                                log.debug(PREFIX_SKIP + '%r (unable to lstat match): %s' % (match_data.path, ex))
                                continue
                            except IOError, ex:
                                log.debug(PREFIX_SKIP + '%r (unable to lstat match): %s' % (match_data.path, ex))
                                continue

                            if not stat.S_ISREG(match_stat.st_mode):
                                log.debug(PREFIX_SKIP + '%r (not a regular file)' % match_data.path)
                                continue # No longer a regular file

                            # Verify/update hash
                            match_hash = db.path_hash(match_data.path, match_stat)
                            if match_hash == None:
                                match_hash = build_hash(Walker.Target(match_data.path, match_data.path, match_stat))
                                if match_hash != None:
                                    db.path_setstat(match_data.path, match_stat, match_hash)
                            if match_hash == None:
                                log.debug(PREFIX_SKIP + '%r (unable to determine hash)' % match_data.path)
                                continue

                            # update data
                            match_data = match_data._replace(hash=match_hash, size=match_stat.st_size, time=match_stat.st_mtime, mark=db.mark)
                            if (match_data.hash != target_data.hash) or (match_data.size != target_data.size):
                                log.debug(PREFIX_SKIP + '%r (files no longer match)' % match_data.path)
                                continue # skip if its no longer identical

                        log.verbose('comp %s' % match_data.path)
                    else:
                        # remote
                        log.verbose('comp %s:%s' % (match_data.source, match_data.path))

                    # Add the match
                    matches.append(match_data)
                if len(matches) != 0:
                    yield (target, target_data, matches, db)
        finally:
            db.connection.commit()

def main():
    app = AppHashDB(cmdline=True)
    exit(app.run())

if __name__ == '__main__':
    main()